1   /*
2    * Copyright (C) 2012 The Guava Authors
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    * http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package com.google.common.io;
18  
19  import static com.google.common.base.Preconditions.checkNotNull;
20  
21  import com.google.common.annotations.Beta;
22  import com.google.common.base.Ascii;
23  import com.google.common.base.Splitter;
24  import com.google.common.collect.AbstractIterator;
25  import com.google.common.collect.ImmutableList;
26  import com.google.common.collect.Lists;
27  
28  import java.io.BufferedReader;
29  import java.io.IOException;
30  import java.io.Reader;
31  import java.io.Writer;
32  import java.nio.charset.Charset;
33  import java.util.Iterator;
34  import java.util.List;
35  import java.util.regex.Pattern;
36  
37  import javax.annotation.Nullable;
38  
39  /**
40   * A readable source of characters, such as a text file. Unlike a {@link Reader}, a
41   * {@code CharSource} is not an open, stateful stream of characters that can be read and closed.
42   * Instead, it is an immutable <i>supplier</i> of {@code Reader} instances.
43   *
44   * <p>{@code CharSource} provides two kinds of methods:
45   * <ul>
46   *   <li><b>Methods that return a reader:</b> These methods should return a <i>new</i>, independent
47   *   instance each time they are called. The caller is responsible for ensuring that the returned
48   *   reader is closed.
49   *   <li><b>Convenience methods:</b> These are implementations of common operations that are
50   *   typically implemented by opening a reader using one of the methods in the first category,
51   *   doing something and finally closing the reader that was opened.
52   * </ul>
53   *
54   * <p>Several methods in this class, such as {@link #readLines()}, break the contents of the
55   * source into lines. Like {@link BufferedReader}, these methods break lines on any of {@code \n},
56   * {@code \r} or {@code \r\n}, do not include the line separator in each line and do not consider
57   * there to be an empty line at the end if the contents are terminated with a line separator.
58   *
59   * <p>Any {@link ByteSource} containing text encoded with a specific {@linkplain Charset character
60   * encoding} may be viewed as a {@code CharSource} using {@link ByteSource#asCharSource(Charset)}.
61   *
62   * @since 14.0
63   * @author Colin Decker
64   */
65  public abstract class CharSource {
66  
67    /**
68     * Constructor for use by subclasses.
69     */
70    protected CharSource() {}
71  
72    /**
73     * Opens a new {@link Reader} for reading from this source. This method should return a new,
74     * independent reader each time it is called.
75     *
76     * <p>The caller is responsible for ensuring that the returned reader is closed.
77     *
78     * @throws IOException if an I/O error occurs in the process of opening the reader
79     */
80    public abstract Reader openStream() throws IOException;
81  
82    /**
83     * Opens a new {@link BufferedReader} for reading from this source. This method should return a
84     * new, independent reader each time it is called.
85     *
86     * <p>The caller is responsible for ensuring that the returned reader is closed.
87     *
88     * @throws IOException if an I/O error occurs in the process of opening the reader
89     */
90    public BufferedReader openBufferedStream() throws IOException {
91      Reader reader = openStream();
92      return (reader instanceof BufferedReader)
93          ? (BufferedReader) reader
94          : new BufferedReader(reader);
95    }
96  
97    /**
98     * Appends the contents of this source to the given {@link Appendable} (such as a {@link Writer}).
99     * Does not close {@code appendable} if it is {@code Closeable}.
100    *
101    * @throws IOException if an I/O error occurs in the process of reading from this source or
102    *     writing to {@code appendable}
103    */
104   public long copyTo(Appendable appendable) throws IOException {
105     checkNotNull(appendable);
106 
107     Closer closer = Closer.create();
108     try {
109       Reader reader = closer.register(openStream());
110       return CharStreams.copy(reader, appendable);
111     } catch (Throwable e) {
112       throw closer.rethrow(e);
113     } finally {
114       closer.close();
115     }
116   }
117 
118   /**
119    * Copies the contents of this source to the given sink.
120    *
121    * @throws IOException if an I/O error occurs in the process of reading from this source or
122    *     writing to {@code sink}
123    */
124   public long copyTo(CharSink sink) throws IOException {
125     checkNotNull(sink);
126 
127     Closer closer = Closer.create();
128     try {
129       Reader reader = closer.register(openStream());
130       Writer writer = closer.register(sink.openStream());
131       return CharStreams.copy(reader, writer);
132     } catch (Throwable e) {
133       throw closer.rethrow(e);
134     } finally {
135       closer.close();
136     }
137   }
138 
139   /**
140    * Reads the contents of this source as a string.
141    *
142    * @throws IOException if an I/O error occurs in the process of reading from this source
143    */
144   public String read() throws IOException {
145     Closer closer = Closer.create();
146     try {
147       Reader reader = closer.register(openStream());
148       return CharStreams.toString(reader);
149     } catch (Throwable e) {
150       throw closer.rethrow(e);
151     } finally {
152       closer.close();
153     }
154   }
155 
156   /**
157    * Reads the first link of this source as a string. Returns {@code null} if this source is empty.
158    *
159    * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
160    * {@code \r\n}, does not include the line separator in the returned line and does not consider
161    * there to be an extra empty line at the end if the content is terminated with a line separator.
162    *
163    * @throws IOException if an I/O error occurs in the process of reading from this source
164    */
165   public @Nullable String readFirstLine() throws IOException {
166     Closer closer = Closer.create();
167     try {
168       BufferedReader reader = closer.register(openBufferedStream());
169       return reader.readLine();
170     } catch (Throwable e) {
171       throw closer.rethrow(e);
172     } finally {
173       closer.close();
174     }
175   }
176 
177   /**
178    * Reads all the lines of this source as a list of strings. The returned list will be empty if
179    * this source is empty.
180    *
181    * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
182    * {@code \r\n}, does not include the line separator in the returned lines and does not consider
183    * there to be an extra empty line at the end if the content is terminated with a line separator.
184    *
185    * @throws IOException if an I/O error occurs in the process of reading from this source
186    */
187   public ImmutableList<String> readLines() throws IOException {
188     Closer closer = Closer.create();
189     try {
190       BufferedReader reader = closer.register(openBufferedStream());
191       List<String> result = Lists.newArrayList();
192       String line;
193       while ((line = reader.readLine()) != null) {
194         result.add(line);
195       }
196       return ImmutableList.copyOf(result);
197     } catch (Throwable e) {
198       throw closer.rethrow(e);
199     } finally {
200       closer.close();
201     }
202   }
203 
204   /**
205    * Reads lines of text from this source, processing each line as it is read using the given
206    * {@link LineProcessor processor}. Stops when all lines have been processed or the processor
207    * returns {@code false} and returns the result produced by the processor.
208    *
209    * <p>Like {@link BufferedReader}, this method breaks lines on any of {@code \n}, {@code \r} or
210    * {@code \r\n}, does not include the line separator in the lines passed to the {@code processor}
211    * and does not consider there to be an extra empty line at the end if the content is terminated
212    * with a line separator.
213    *
214    * @throws IOException if an I/O error occurs in the process of reading from this source or if
215    *     {@code processor} throws an {@code IOException}
216    * @since 16.0
217    */
218   @Beta
219   public <T> T readLines(LineProcessor<T> processor) throws IOException {
220     checkNotNull(processor);
221 
222     Closer closer = Closer.create();
223     try {
224       Reader reader = closer.register(openStream());
225       return CharStreams.readLines(reader, processor);
226     } catch (Throwable e) {
227       throw closer.rethrow(e);
228     } finally {
229       closer.close();
230     }
231   }
232 
233   /**
234    * Returns whether the source has zero chars. The default implementation is to open a stream and
235    * check for EOF.
236    *
237    * @throws IOException if an I/O error occurs
238    * @since 15.0
239    */
240   public boolean isEmpty() throws IOException {
241     Closer closer = Closer.create();
242     try {
243       Reader reader = closer.register(openStream());
244       return reader.read() == -1;
245     } catch (Throwable e) {
246       throw closer.rethrow(e);
247     } finally {
248       closer.close();
249     }
250   }
251 
252   /**
253    * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
254    * the source will contain the concatenated data from the streams of the underlying sources.
255    *
256    * <p>Only one underlying stream will be open at a time. Closing the  concatenated stream will
257    * close the open underlying stream.
258    *
259    * @param sources the sources to concatenate
260    * @return a {@code CharSource} containing the concatenated data
261    * @since 15.0
262    */
263   public static CharSource concat(Iterable<? extends CharSource> sources) {
264     return new ConcatenatedCharSource(sources);
265   }
266 
267   /**
268    * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
269    * the source will contain the concatenated data from the streams of the underlying sources.
270    *
271    * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
272    * close the open underlying stream.
273    *
274    * <p>Note: The input {@code Iterator} will be copied to an {@code ImmutableList} when this
275    * method is called. This will fail if the iterator is infinite and may cause problems if the
276    * iterator eagerly fetches data for each source when iterated (rather than producing sources
277    * that only load data through their streams). Prefer using the {@link #concat(Iterable)}
278    * overload if possible.
279    *
280    * @param sources the sources to concatenate
281    * @return a {@code CharSource} containing the concatenated data
282    * @throws NullPointerException if any of {@code sources} is {@code null}
283    * @since 15.0
284    */
285   public static CharSource concat(Iterator<? extends CharSource> sources) {
286     return concat(ImmutableList.copyOf(sources));
287   }
288 
289   /**
290    * Concatenates multiple {@link CharSource} instances into a single source. Streams returned from
291    * the source will contain the concatenated data from the streams of the underlying sources.
292    *
293    * <p>Only one underlying stream will be open at a time. Closing the concatenated stream will
294    * close the open underlying stream.
295    *
296    * @param sources the sources to concatenate
297    * @return a {@code CharSource} containing the concatenated data
298    * @throws NullPointerException if any of {@code sources} is {@code null}
299    * @since 15.0
300    */
301   public static CharSource concat(CharSource... sources) {
302     return concat(ImmutableList.copyOf(sources));
303   }
304 
305   /**
306    * Returns a view of the given character sequence as a {@link CharSource}. The behavior of the
307    * returned {@code CharSource} and any {@code Reader} instances created by it is unspecified if
308    * the {@code charSequence} is mutated while it is being read, so don't do that.
309    *
310    * @since 15.0 (since 14.0 as {@code CharStreams.asCharSource(String)})
311    */
312   public static CharSource wrap(CharSequence charSequence) {
313     return new CharSequenceCharSource(charSequence);
314   }
315 
316   /**
317    * Returns an immutable {@link CharSource} that contains no characters.
318    *
319    * @since 15.0
320    */
321   public static CharSource empty() {
322     return EmptyCharSource.INSTANCE;
323   }
324 
325   private static class CharSequenceCharSource extends CharSource {
326 
327     private static final Splitter LINE_SPLITTER
328         = Splitter.on(Pattern.compile("\r\n|\n|\r"));
329 
330     private final CharSequence seq;
331 
332     protected CharSequenceCharSource(CharSequence seq) {
333       this.seq = checkNotNull(seq);
334     }
335 
336     @Override
337     public Reader openStream() {
338       return new CharSequenceReader(seq);
339     }
340 
341     @Override
342     public String read() {
343       return seq.toString();
344     }
345 
346     @Override
347     public boolean isEmpty() {
348       return seq.length() == 0;
349     }
350 
351     /**
352      * Returns an iterable over the lines in the string. If the string ends in
353      * a newline, a final empty string is not included to match the behavior of
354      * BufferedReader/LineReader.readLine().
355      */
356     private Iterable<String> lines() {
357       return new Iterable<String>() {
358         @Override
359         public Iterator<String> iterator() {
360           return new AbstractIterator<String>() {
361             Iterator<String> lines = LINE_SPLITTER.split(seq).iterator();
362 
363             @Override
364             protected String computeNext() {
365               if (lines.hasNext()) {
366                 String next = lines.next();
367                 // skip last line if it's empty
368                 if (lines.hasNext() || !next.isEmpty()) {
369                   return next;
370                 }
371               }
372               return endOfData();
373             }
374           };
375         }
376       };
377     }
378 
379     @Override
380     public String readFirstLine() {
381       Iterator<String> lines = lines().iterator();
382       return lines.hasNext() ? lines.next() : null;
383     }
384 
385     @Override
386     public ImmutableList<String> readLines() {
387       return ImmutableList.copyOf(lines());
388     }
389 
390     @Override
391     public <T> T readLines(LineProcessor<T> processor) throws IOException {
392       for (String line : lines()) {
393         if (!processor.processLine(line)) {
394           break;
395         }
396       }
397       return processor.getResult();
398     }
399 
400     @Override
401     public String toString() {
402       return "CharSource.wrap(" + Ascii.truncate(seq, 30, "...") + ")";
403     }
404   }
405 
406   private static final class EmptyCharSource extends CharSequenceCharSource {
407 
408     private static final EmptyCharSource INSTANCE = new EmptyCharSource();
409 
410     private EmptyCharSource() {
411       super("");
412     }
413 
414     @Override
415     public String toString() {
416       return "CharSource.empty()";
417     }
418   }
419 
420   private static final class ConcatenatedCharSource extends CharSource {
421 
422     private final Iterable<? extends CharSource> sources;
423 
424     ConcatenatedCharSource(Iterable<? extends CharSource> sources) {
425       this.sources = checkNotNull(sources);
426     }
427 
428     @Override
429     public Reader openStream() throws IOException {
430       return new MultiReader(sources.iterator());
431     }
432 
433     @Override
434     public boolean isEmpty() throws IOException {
435       for (CharSource source : sources) {
436         if (!source.isEmpty()) {
437           return false;
438         }
439       }
440       return true;
441     }
442 
443     @Override
444     public String toString() {
445       return "CharSource.concat(" + sources + ")";
446     }
447   }
448 }